{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "ba4628a8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "\n",
       "                <script type=\"application/javascript\" id=\"jupyter_black\">\n",
       "                (function() {\n",
       "                    if (window.IPython === undefined) {\n",
       "                        return\n",
       "                    }\n",
       "                    var msg = \"WARNING: it looks like you might have loaded \" +\n",
       "                        \"jupyter_black in a non-lab notebook with \" +\n",
       "                        \"`is_lab=True`. Please double check, and if \" +\n",
       "                        \"loading with `%load_ext` please review the README!\"\n",
       "                    console.log(msg)\n",
       "                    alert(msg)\n",
       "                })()\n",
       "                </script>\n",
       "                "
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Lint notebook\n",
    "%load_ext jupyter_black"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6f8a3a00",
   "metadata": {},
   "source": [
    "For more examples and intutition see [here](https://www.kaggle.com/code/yassinehamdaoui1/creating-tf-idf-model-from-scratch) and [here](https://www.capitalone.com/tech/machine-learning/understanding-tf-idf/) and [here](https://melaniewalsh.github.io/Intro-Cultural-Analytics/05-Text-Analysis/03-TF-IDF-Scikit-Learn.html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "7ab14f29",
   "metadata": {},
   "outputs": [],
   "source": [
    "import math\n",
    "\n",
    "# Process documents into individual words\n",
    "document_a = [\n",
    "    \"Hold\",\n",
    "    \"fast\",\n",
    "    \"to\",\n",
    "    \"dreams\",\n",
    "    \"for\",\n",
    "    \"if\",\n",
    "    \"dreams\",\n",
    "    \"die\",\n",
    "    \"life\",\n",
    "    \"is\",\n",
    "    \"a\",\n",
    "    \"broken-winged\",\n",
    "    \"bird\",\n",
    "    \"that\",\n",
    "    \"cannot\",\n",
    "    \"fly\",\n",
    "]\n",
    "document_b = [\n",
    "    \"No\",\n",
    "    \"bird\",\n",
    "    \"soars\",\n",
    "    \"too\",\n",
    "    \"high\",\n",
    "    \"if\",\n",
    "    \"he\",\n",
    "    \"soars\",\n",
    "    \"with\",\n",
    "    \"his\",\n",
    "    \"own\",\n",
    "    \"wings\",\n",
    "]\n",
    "\n",
    "# Total set of words\n",
    "total_corpus = set(document_a).union(set(document_b))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "149797a5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dreams</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>No</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hold</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>for</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>die</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>he</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fast</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>his</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>life</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>that</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wings</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>to</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>with</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cannot</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fly</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>soars</th>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>is</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bird</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>if</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>own</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>high</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>broken-winged</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>too</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               0  1\n",
       "a              1  0\n",
       "dreams         2  0\n",
       "No             0  1\n",
       "Hold           1  0\n",
       "for            1  0\n",
       "die            1  0\n",
       "he             0  1\n",
       "fast           1  0\n",
       "his            0  1\n",
       "life           1  0\n",
       "that           1  0\n",
       "wings          0  1\n",
       "to             1  0\n",
       "with           0  1\n",
       "cannot         1  0\n",
       "fly            1  0\n",
       "soars          0  2\n",
       "is             1  0\n",
       "bird           1  1\n",
       "if             1  1\n",
       "own            0  1\n",
       "high           0  1\n",
       "broken-winged  1  0\n",
       "too            0  1"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Simple frequency counts of words per document by initializing a dict\n",
    "import pandas as pd\n",
    "dict_a = dict.fromkeys(total_corpus, 0)\n",
    "dict_b = dict.fromkeys(total_corpus, 0)\n",
    "\n",
    "for word in document_a:\n",
    "    dict_a[word] += 1\n",
    "\n",
    "for word in document_b:\n",
    "    dict_b[word] += 1\n",
    "\n",
    "frequency = pd.DataFrame([dict_a, dict_b])\n",
    "frequency.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "4408b987",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dreams</th>\n",
       "      <td>0.037629</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>No</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Hold</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>for</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>die</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>he</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fast</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>his</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>life</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>that</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wings</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>to</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>with</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cannot</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fly</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>soars</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.050172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>is</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>bird</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>if</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>own</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>high</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>broken-winged</th>\n",
       "      <td>0.018814</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>too</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.025086</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      0         1\n",
       "a              0.018814  0.000000\n",
       "dreams         0.037629  0.000000\n",
       "No             0.000000  0.025086\n",
       "Hold           0.018814  0.000000\n",
       "for            0.018814  0.000000\n",
       "die            0.018814  0.000000\n",
       "he             0.000000  0.025086\n",
       "fast           0.018814  0.000000\n",
       "his            0.000000  0.025086\n",
       "life           0.018814  0.000000\n",
       "that           0.018814  0.000000\n",
       "wings          0.000000  0.025086\n",
       "to             0.018814  0.000000\n",
       "with           0.000000  0.025086\n",
       "cannot         0.018814  0.000000\n",
       "fly            0.018814  0.000000\n",
       "soars          0.000000  0.050172\n",
       "is             0.018814  0.000000\n",
       "bird           0.018814  0.025086\n",
       "if             0.018814  0.025086\n",
       "own            0.000000  0.025086\n",
       "high           0.000000  0.025086\n",
       "broken-winged  0.018814  0.000000\n",
       "too            0.000000  0.025086"
      ]
     },
     "execution_count": 57,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def tf(doc_dict: dict, doc_elements: list[str]) -> dict:\n",
    "    \"\"\"Term frequency of a word in a document  over total words in document\"\"\"\n",
    "    tf_dict = {}\n",
    "    corpus_count = len(doc_elements)\n",
    "    \n",
    "    for word, count in doc_dict.items():\n",
    "        tf_dict[word] = count / float(corpus_count)\n",
    "        \n",
    "    return tf_dict\n",
    "\n",
    "\n",
    "def idf(doc_list: list[dict[str, int]]) -> dict:\n",
    "    \"\"\"Given N documents (doc_list), the number of documents in which the term appears per a term\"\"\"\n",
    "    idf_dict = {}\n",
    "    N = len(doc_list)\n",
    "\n",
    "    idf_dict = dict.fromkeys(doc_list[0].keys(), 0)\n",
    "\n",
    "    for word in idf_dict.keys():\n",
    "        idf_dict[word] = sum(doc[word] > 0 for doc in doc_list)\n",
    "\n",
    "    for word, val in idf_dict.items():\n",
    "        idf_dict[word] = math.log10((N + 1.0) / (val + 1.0))\n",
    "\n",
    "    return idf_dict\n",
    "\n",
    "# All inverse document frequencies for all words \n",
    "idfs = idf([dict_a, dict_b])\n",
    "\n",
    "\n",
    "def tfidf(doc_elements: dict[str, int], idfs: dict[str, int])-> dict:\n",
    "    \"\"\"TF * IDF per word given a single word in a single document and number of docs the term appears in\"\"\"\n",
    "    tfidf_dict = {}\n",
    "    \n",
    "    for word, val in doc_elements.items():\n",
    "        tfidf_dict[word] = val * idfs[word]\n",
    "\n",
    "    return tfidf_dict\n",
    "\n",
    "# Calculate the term frequency for each document individually\n",
    "tf_a = tf(dict_a, document_a)\n",
    "tf_b = tf(dict_b, document_b)\n",
    "\n",
    "# Calculate the inverse document frequency given each term frequency\n",
    "tfidf_a = tfidf(tf_a, idfs)\n",
    "tfidf_b = tfidf(tf_b, idfs)\n",
    "\n",
    "# Return weight of each word in each document wrt to the total corpus\n",
    "document_tfidf = pd.DataFrame([tfidf_a, tfidf_b])\n",
    "document_tfidf.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "d368b9a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "\n",
    "corpus = [\n",
    "    \"Hold fast to dreams, for if dreams die, life is a broken-winged bird that cannot fly.\",\n",
    "    \"No bird soars too high if he soars with his own wings.\",\n",
    "]\n",
    "\n",
    "text_titles = [\"quote_langstonhughes\", \"quote_william_blake\"]\n",
    "\n",
    "vectorizer = TfidfVectorizer()\n",
    "vector = vectorizer.fit_transform(corpus)\n",
    "dict(zip(vectorizer.get_feature_names_out(), vector.toarray()[0]))\n",
    "\n",
    "tfidf_df = pd.DataFrame(vector.toarray(), index=text_titles, columns=vectorizer.get_feature_names_out())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "e74435ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "tfidf_df.loc['00_Document Frequency'] = (tfidf_df > 0).sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "224355e4",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>dreams_langstonhughes</th>\n",
       "      <th>quote_william_blake</th>\n",
       "      <th>00_Document Frequency</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>bird</th>\n",
       "      <td>0.172503</td>\n",
       "      <td>0.197242</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>broken</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cannot</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>die</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dreams</th>\n",
       "      <td>0.484893</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fast</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fly</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>for</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>he</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>high</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>his</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>hold</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>if</th>\n",
       "      <td>0.172503</td>\n",
       "      <td>0.197242</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>is</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>life</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>no</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>own</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>soars</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.554434</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>that</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>to</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>too</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>winged</th>\n",
       "      <td>0.242447</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>wings</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>with</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.277217</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        quote_langstonhughes  quote_william_blake  00_Document Frequency\n",
       "bird                 0.172503             0.197242                    2.0\n",
       "broken               0.242447             0.000000                    1.0\n",
       "cannot               0.242447             0.000000                    1.0\n",
       "die                  0.242447             0.000000                    1.0\n",
       "dreams               0.484893             0.000000                    1.0\n",
       "fast                 0.242447             0.000000                    1.0\n",
       "fly                  0.242447             0.000000                    1.0\n",
       "for                  0.242447             0.000000                    1.0\n",
       "he                   0.000000             0.277217                    1.0\n",
       "high                 0.000000             0.277217                    1.0\n",
       "his                  0.000000             0.277217                    1.0\n",
       "hold                 0.242447             0.000000                    1.0\n",
       "if                   0.172503             0.197242                    2.0\n",
       "is                   0.242447             0.000000                    1.0\n",
       "life                 0.242447             0.000000                    1.0\n",
       "no                   0.000000             0.277217                    1.0\n",
       "own                  0.000000             0.277217                    1.0\n",
       "soars                0.000000             0.554434                    1.0\n",
       "that                 0.242447             0.000000                    1.0\n",
       "to                   0.242447             0.000000                    1.0\n",
       "too                  0.000000             0.277217                    1.0\n",
       "winged               0.242447             0.000000                    1.0\n",
       "wings                0.000000             0.277217                    1.0\n",
       "with                 0.000000             0.277217                    1.0"
      ]
     },
     "execution_count": 59,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tfidf_df.T"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Raw Cell Format",
  "kernelspec": {
   "display_name": "embed",
   "language": "python",
   "name": "embed"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
